1.1 Table1

rm(list=ls())
# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(extrafont)
library(reshape2)
library(car) 
library(cowplot)
# Load fonts for Windows device
loadfonts(device = "win")

sheet_names <- excel_sheets("Data.xlsx")
# Display the sheet names
#print(sheet_names)


read_and_assign_sheets <- function(file_path) {
  # Get the sheet names
  sheet_names <- excel_sheets(file_path)
  
  # Loop through each sheet name
  for (sheet in sheet_names) {
    # Read the data from each sheet
    data <- read.xlsx(file_path, sheet = sheet, colNames = TRUE)
    data$Con[is.na(data$Con)] <- paste0("rep", seq_len(sum(is.na(data$Con))))
    # Assign the data to a variable with the name of the sheet
    assign(sheet, data, envir = .GlobalEnv)
  }
}

file_path <- "Data.xlsx"
read_and_assign_sheets(file_path)
# Define a function to perform normality and variance tests
perform_tests <- function(data) {
  # Exclude rows where "Con" is "Average" or "STDEV"
  clean_data <- data %>%
    filter(!Con %in% c("Average", "STDEV"))
  
  # Ensure all columns except "Con" are numeric for the normality test
  clean_data_numeric <- clean_data %>%
    select_if(is.numeric)
  
  # Perform Shapiro-Wilk test for normality on numeric columns
  normality_results <- apply(clean_data_numeric, 2, function(x) shapiro.test(x)$p.value)
  
  # Perform Levene's test for equality of variance
  # Melt the data to long format for Levene's test
  long_data <- melt(clean_data, id.vars = "Con", variable.name = "Concentration", value.name = "Value")
  
  # Ensure the 'Value' column is numeric for Levene's test
  long_data <- long_data %>%
    filter(!is.na(Value)) %>%
    mutate(Value = as.numeric(Value)) # Convert 'Value' to numeric
  
  # Perform Levene's test
  levene_test <- leveneTest(Value ~ Concentration, data = long_data)
  
  # Return the results as a list
  results <- list(
    normality_p_values = normality_results,
    levene_test = levene_test
  )
  
  return(results)
}

# Example usage for all your datasets
sheet_names <- excel_sheets("Data.xlsx")

for (sheet in sheet_names) {
  test_result <- perform_tests(get(sheet))
  print(paste("Results for", sheet))
  print(test_result)
}
## [1] "Results for AIa"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.5633650 0.8521294 0.9569349 0.6875491 0.7476451 0.9609992 0.4311201 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.1244 0.3738
##       28               
## 
## [1] "Results for Arg"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.5601209 0.5497490 0.4985203 0.4846414 0.8414910 0.9061812 0.7623688 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.4331 0.8505
##       28               
## 
## [1] "Results for Asp"
## $normality_p_values
##       1000        500        100         50         10          5          1 
## 0.56092773 0.61274594 0.01063096 0.26294865 0.66713049 0.97872100 0.98559315 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6    3.47 0.01086 *
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Aspg"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.1895405 0.4612731 0.7480872 0.6306162 0.3914674 0.9997378 0.5579808 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.3975 0.8743
##       28               
## 
## [1] "Results for Cys"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.4176463 0.5439844 0.3189247 0.7849049 0.9344402 0.6357983 0.7654510 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.1217 0.3753
##       28               
## 
## [1] "Results for Gln"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.7945513 0.2110115 0.5767714 0.4026715 0.9804080 0.9580997 0.6406464 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6  2.9551 0.02311 *
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Glu"
## $normality_p_values
##       1000        500        100         50         10          5          1 
## 0.05905854 0.97397379 0.95039490 0.72586601 0.60842902 0.67993881 0.41082688 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.7057 0.1566
##       28               
## 
## [1] "Results for Gly"
## $normality_p_values
##       1000        500        100         50         10          5 
## 0.64977337 0.82996078 0.72502949 0.06877027 0.49841512 0.28417714 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.0254 0.4303
##       27               
## 
## [1] "Results for His"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.4015037 0.9562376 0.1896328 0.8976194 0.8852202 0.8465223 0.6661249 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6  2.9201 0.02436 *
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Iso-Leu"
## $normality_p_values
##       1000        500        100         50         10          5          1 
## 0.24824949 0.83193675 0.15731281 0.98381524 0.36937051 0.08378289 0.25961350 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6  2.1131 0.08341 .
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Leu"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.7246126 0.8555970 0.7771820 0.8814065 0.4985817 0.2186498 0.2491243 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6  2.4986 0.04608 *
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Lys"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.5958671 0.4273715 0.3453429 0.4096900 0.6429562 0.9684607 0.5856278 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.7478 0.6162
##       28               
## 
## [1] "Results for Met"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.9837256 0.9342727 0.7228427 0.7532409 0.2495740 0.7592478 0.6621991 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.4206  0.859
##       28               
## 
## [1] "Results for Phe"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.4969510 0.6873629 0.5100695 0.8256046 0.3468916 0.8609741 0.4446205 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  6  2.3299 0.05969 .
##       28                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Pro"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.5314929 0.9404085 0.9792745 0.3100920 0.9206538 0.9611291 0.3715548 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.9772 0.4589
##       28               
## 
## [1] "Results for Ser"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.9517442 0.8950489 0.9919508 0.6675814 0.7835833 0.7966734 0.7496777 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.2053 0.9722
##       28               
## 
## [1] "Results for Thr"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.4245250 0.6199374 0.8493000 0.8251067 0.9814672 0.3021418 0.6025638 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.4121  0.245
##       28               
## 
## [1] "Results for Trp"
## $normality_p_values
##       1000        500        100         50         10          5          1 
## 0.29630735 0.89266526 0.94870419 0.96297362 0.73143774 0.88658559 0.07746806 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  0.2206 0.9669
##       28               
## 
## [1] "Results for Tyr"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.7789145 0.8180186 0.4158107 0.2857817 0.7472215 0.8260023 0.9818407 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.6522   0.17
##       28               
## 
## [1] "Results for Val"
## $normality_p_values
##      1000       500       100        50        10         5         1 
## 0.4859096 0.6683519 0.7178185 0.9972318 0.5979738 0.8478458 0.3870650 
## 
## $levene_test
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  6  1.6111 0.1811
##       28

1.2 Table2

perform_anova <- function(sheet_name, data) {
  
  # Exclude rows where "Con" is "Average" or "STDEV"
  clean_data <- data %>%
    filter(!Con %in% c("Average", "STDEV"))
  
  # Melt the data to long format for analysis
  long_data <- melt(clean_data, id.vars = "Con", variable.name = "Concentration", value.name = "Value") %>%
    filter(!is.na(Value)) %>%
    mutate(Value = as.numeric(Value))
  
  # List of sheets that require Welch's ANOVA
  welch_sheets <- c("Asp", "Gln", "His", "Leu")
  
  if (sheet_name %in% welch_sheets) {
    # Perform Welch's ANOVA using oneway.test()
    welch_anova <- oneway.test(Value ~ Concentration, data = long_data, var.equal = FALSE)
    
    # Manually calculate Sum of Squares (Sum Sq) and Mean Squares (Mean Sq) for Welch's ANOVA
    group_means <- tapply(long_data$Value, long_data$Concentration, mean)
    overall_mean <- mean(long_data$Value)
    group_sizes <- tapply(long_data$Value, long_data$Concentration, length)
    
    # Sum of Squares Between Groups (SSB)
    SSB <- sum(group_sizes * (group_means - overall_mean)^2)
    
    # Sum of Squares Within Groups (SSW) (Welch's ANOVA doesn't assume equal variances)
    SSW <- sum((long_data$Value - rep(group_means, times = group_sizes))^2)
    
    # Degrees of freedom
    df_between <- length(unique(long_data$Concentration)) - 1
    df_within <- welch_anova$parameter[1]  # Welch's ANOVA df (adjusted for unequal variances)
    
    # Mean Squares Between (MSB) and Mean Squares Within (MSW)
    MSB <- SSB / df_between
    MSW <- SSW / df_within
    
    result <- list(
      sheet = sheet_name,
      test_type = "Welch's ANOVA",
      F_statistic = welch_anova$statistic,
      p_value = welch_anova$p.value,
      Sum_Sq_Between = SSB,
      Mean_Sq_Between = MSB,
      Sum_Sq_Within = SSW,
      Mean_Sq_Within = MSW,
      df_between = df_between,
      df_within = df_within
    )
    
  } else {
    # Perform regular ANOVA
    anova_result <- aov(Value ~ Concentration, data = long_data)
    anova_summary <- summary(anova_result)
    
    result <- list(
      sheet = sheet_name,
      test_type = "Regular ANOVA",
      anova_result = anova_summary
    )
  }
  
  return(result)
}

for (sheet in sheet_names) {
  data <- get(sheet)  # Fetch the data frame (e.g., Asp, Gln, His, etc.)
  test_result <- perform_anova(sheet, data)
  
  # Print the result
  print(paste("Results for", sheet))
  print(test_result)
}
## [1] "Results for AIa"
## $sheet
## [1] "AIa"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  30692    5115   851.5 <2e-16 ***
## Residuals     28    168       6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Arg"
## $sheet
## [1] "Arg"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  18102  3016.9   189.7 <2e-16 ***
## Residuals     28    445    15.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Asp"
## $sheet
## [1] "Asp"
## 
## $test_type
## [1] "Welch's ANOVA"
## 
## $F_statistic
##        F 
## 809.7435 
## 
## $p_value
## [1] 4.361223e-15
## 
## $Sum_Sq_Between
## [1] 102857.6
## 
## $Mean_Sq_Between
## [1] 17142.94
## 
## $Sum_Sq_Within
## [1] 291.8451
## 
## $Mean_Sq_Within
##   num df 
## 48.64085 
## 
## $df_between
## [1] 6
## 
## $df_within
## num df 
##      6 
## 
## [1] "Results for Aspg"
## $sheet
## [1] "Aspg"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  74035   12339    1282 <2e-16 ***
## Residuals     28    269      10                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Cys"
## $sheet
## [1] "Cys"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6 203266   33878    2087 <2e-16 ***
## Residuals     28    455      16                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Gln"
## $sheet
## [1] "Gln"
## 
## $test_type
## [1] "Welch's ANOVA"
## 
## $F_statistic
##        F 
## 1244.193 
## 
## $p_value
## [1] 3.198327e-16
## 
## $Sum_Sq_Between
## [1] 194427.3
## 
## $Mean_Sq_Between
## [1] 32404.55
## 
## $Sum_Sq_Within
## [1] 322.0786
## 
## $Mean_Sq_Within
##   num df 
## 53.67977 
## 
## $df_between
## [1] 6
## 
## $df_within
## num df 
##      6 
## 
## [1] "Results for Glu"
## $sheet
## [1] "Glu"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6 239702   39950    2843 <2e-16 ***
## Residuals     28    393      14                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Gly"
## $sheet
## [1] "Gly"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  23033    3839    1022 <2e-16 ***
## Residuals     27    101       4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
## 
## [1] "Results for His"
## $sheet
## [1] "His"
## 
## $test_type
## [1] "Welch's ANOVA"
## 
## $F_statistic
##        F 
## 1455.179 
## 
## $p_value
## [1] 8.626972e-17
## 
## $Sum_Sq_Between
## [1] 138984.6
## 
## $Mean_Sq_Between
## [1] 23164.1
## 
## $Sum_Sq_Within
## [1] 404.4086
## 
## $Mean_Sq_Within
##   num df 
## 67.40143 
## 
## $df_between
## [1] 6
## 
## $df_within
## num df 
##      6 
## 
## [1] "Results for Iso-Leu"
## $sheet
## [1] "Iso-Leu"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  60077   10013    2425 <2e-16 ***
## Residuals     28    116       4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Leu"
## $sheet
## [1] "Leu"
## 
## $test_type
## [1] "Welch's ANOVA"
## 
## $F_statistic
##        F 
## 956.2051 
## 
## $p_value
## [1] 1.946673e-15
## 
## $Sum_Sq_Between
## [1] 98358.64
## 
## $Mean_Sq_Between
## [1] 16393.11
## 
## $Sum_Sq_Within
## [1] 216.175
## 
## $Mean_Sq_Within
##   num df 
## 36.02916 
## 
## $df_between
## [1] 6
## 
## $df_within
## num df 
##      6 
## 
## [1] "Results for Lys"
## $sheet
## [1] "Lys"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  88281   14713   852.5 <2e-16 ***
## Residuals     28    483      17                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Met"
## $sheet
## [1] "Met"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  56195    9366   242.7 <2e-16 ***
## Residuals     28   1080      39                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Phe"
## $sheet
## [1] "Phe"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6 148641   24774    1383 <2e-16 ***
## Residuals     28    502      18                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Pro"
## $sheet
## [1] "Pro"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  31849    5308   924.4 <2e-16 ***
## Residuals     28    161       6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Ser"
## $sheet
## [1] "Ser"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  10728  1788.0   267.4 <2e-16 ***
## Residuals     28    187     6.7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Thr"
## $sheet
## [1] "Thr"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  77954   12992    1488 <2e-16 ***
## Residuals     28    244       9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Trp"
## $sheet
## [1] "Trp"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  31820    5303   256.3 <2e-16 ***
## Residuals     28    579      21                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Tyr"
## $sheet
## [1] "Tyr"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6 146631   24439    1861 <2e-16 ***
## Residuals     28    368      13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [1] "Results for Val"
## $sheet
## [1] "Val"
## 
## $test_type
## [1] "Regular ANOVA"
## 
## $anova_result
##               Df Sum Sq Mean Sq F value Pr(>F)    
## Concentration  6  39166    6528   936.5 <2e-16 ***
## Residuals     28    195       7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1.3 Table3

###________________________
#regression
#_________________________
perform_regression_on_means_with_rmse <- function(data) {
  
  # Filter the row where "Con" is "Average" to use as Y
  average_row <- data %>% filter(Con == "Average")
  
  # Get the concentration values (column names) as numeric
  concentration_values <- as.numeric(names(data)[-1])  # Skip the first column (Con)
  
  # Get the corresponding Y values (means) from the "Average" row, excluding the first column
  y_values <- as.numeric(average_row[-1])  # Exclude the "Con" column
  
  # Create a data frame for regression
  regression_data <- data.frame(Concentration = concentration_values, Mean = y_values)
  
  # Perform linear regression: Mean (Y) ~ Concentration (X)
  lm_model <- lm(Mean ~ Concentration, data = regression_data)
  
  # Compute predictions based on the model
  predictions <- predict(lm_model)
  
  # Compute RMSE (Root Mean Squared Error)
  residuals <- regression_data$Mean - predictions
  rmse <- sqrt(mean(residuals^2))
  
  # Return the summary of the regression model and RMSE
  regression_summary <- summary(lm_model)
  return(list(regression_summary = regression_summary, RMSE = rmse))
}

for (sheet in sheet_names) {
  data <- get(sheet)  # Fetch the data frame (e.g., AIa, Asp, etc.)
  
  # Perform regression analysis
  regression_result <- perform_regression_on_means_with_rmse(data)
  
  # Print the regression summary and RMSE for the current sheet
  print(paste("Regression results for sheet:", sheet))
  print(regression_result$regression_summary)
  print(paste("RMSE for sheet", sheet, ":", regression_result$RMSE))
}
## [1] "Regression results for sheet: AIa"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -7.254 13.466  4.930  1.697 -2.839 -4.673 -5.327 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   39.945123   3.647372  10.952 0.000110 ***
## Concentration  0.081966   0.008588   9.544 0.000214 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.992 on 5 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9376 
## F-statistic: 91.09 on 1 and 5 DF,  p-value: 0.0002137
## 
## [1] "RMSE for sheet AIa : 6.75485748018442"
## [1] "Regression results for sheet: Arg"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -7.4488 13.9944  4.6878  0.8687 -3.5373 -4.1788 -4.3860 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   90.40034    3.66943  24.636 2.05e-06 ***
## Concentration  0.06170    0.00864   7.141 0.000836 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.041 on 5 degrees of freedom
## Multiple R-squared:  0.9107, Adjusted R-squared:  0.8928 
## F-statistic:    51 on 1 and 5 DF,  p-value: 0.0008362
## 
## [1] "RMSE for sheet Arg : 6.79570909396087"
## [1] "Regression results for sheet: Asp"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -19.185  38.164   4.586  -5.174  -5.982  -6.079  -6.330 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   23.27703    9.09311   2.560  0.05066 . 
## Concentration  0.14649    0.02141   6.842  0.00102 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.93 on 5 degrees of freedom
## Multiple R-squared:  0.9035, Adjusted R-squared:  0.8842 
## F-statistic: 46.81 on 1 and 5 DF,  p-value: 0.001018
## 
## [1] "RMSE for sheet Asp : 16.840244750102"
## [1] "Regression results for sheet: Aspg"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -14.81  21.80  29.39  25.21 -15.35 -21.59 -24.65 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   55.20319   12.08001   4.570   0.0060 **
## Concentration  0.11424    0.02844   4.016   0.0102 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.47 on 5 degrees of freedom
## Multiple R-squared:  0.7634, Adjusted R-squared:  0.7161 
## F-statistic: 16.13 on 1 and 5 DF,  p-value: 0.01016
## 
## [1] "RMSE for sheet Aspg : 22.3719321190439"
## [1] "Regression results for sheet: Cys"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -24.152  45.120  14.894   6.095 -11.422 -14.384 -16.151 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   41.60206   12.03304   3.457 0.018094 *  
## Concentration  0.20718    0.02833   7.312 0.000749 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.37 on 5 degrees of freedom
## Multiple R-squared:  0.9145, Adjusted R-squared:  0.8974 
## F-statistic: 53.47 on 1 and 5 DF,  p-value: 0.0007494
## 
## [1] "RMSE for sheet Cys : 22.2849355741498"
## [1] "Regression results for sheet: Gln"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -19.275  37.571   7.670  -3.200  -7.140  -7.782  -7.845 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   15.81075    9.18449   1.721 0.145787    
## Concentration  0.20630    0.02163   9.539 0.000214 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.13 on 5 degrees of freedom
## Multiple R-squared:  0.9479, Adjusted R-squared:  0.9375 
## F-statistic:    91 on 1 and 5 DF,  p-value: 0.0002142
## 
## [1] "RMSE for sheet Gln : 17.0094880026335"
## [1] "Regression results for sheet: Glu"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -26.018  51.556   5.174  -2.679  -8.535  -9.618  -9.880 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   17.59558   12.29935   1.431 0.211945    
## Concentration  0.22618    0.02896   7.810 0.000552 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.95 on 5 degrees of freedom
## Multiple R-squared:  0.9242, Adjusted R-squared:  0.9091 
## F-statistic:    61 on 1 and 5 DF,  p-value: 0.0005516
## 
## [1] "RMSE for sheet Glu : 22.7781406860965"
## [1] "Regression results for sheet: Gly"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -9.219 18.355  1.882 -1.986 -2.895 -3.087 -3.050 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   24.09999    4.36086   5.526  0.00266 **
## Concentration  0.06988    0.01027   6.806  0.00104 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.556 on 5 degrees of freedom
## Multiple R-squared:  0.9026, Adjusted R-squared:  0.8831 
## F-statistic: 46.32 on 1 and 5 DF,  p-value: 0.001043
## 
## [1] "RMSE for sheet Gly : 8.07622178238542"
## [1] "Regression results for sheet: His"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -20.920  41.148   7.466  -5.706  -7.100  -7.412  -7.476 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   67.64517    9.95724   6.794 0.001052 ** 
## Concentration  0.17130    0.02345   7.307 0.000752 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.82 on 5 degrees of freedom
## Multiple R-squared:  0.9144, Adjusted R-squared:  0.8972 
## F-statistic: 53.39 on 1 and 5 DF,  p-value: 0.0007521
## 
## [1] "RMSE for sheet His : 18.4406070271489"
## [1] "Regression results for sheet: Iso-Leu"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -13.074  24.666   7.398   2.071  -5.896  -7.053  -8.112 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   13.18118    6.41718   2.054  0.09515 .  
## Concentration  0.11283    0.01511   7.468  0.00068 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.06 on 5 degrees of freedom
## Multiple R-squared:  0.9177, Adjusted R-squared:  0.9013 
## F-statistic: 55.76 on 1 and 5 DF,  p-value: 0.0006798
## 
## [1] "RMSE for sheet Iso-Leu : 11.8844799218731"
## [1] "Regression results for sheet: Leu"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -17.318  34.535   2.314  -1.803  -5.508  -5.750  -6.469 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   19.42208    8.17985   2.374 0.063606 .  
## Concentration  0.14442    0.01926   7.499 0.000667 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.92 on 5 degrees of freedom
## Multiple R-squared:  0.9183, Adjusted R-squared:  0.902 
## F-statistic: 56.23 on 1 and 5 DF,  p-value: 0.0006669
## 
## [1] "RMSE for sheet Leu : 15.1489222842836"
## [1] "Regression results for sheet: Lys"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -19.148  35.897  12.357   2.439  -9.350 -10.575 -11.620 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   64.77806    9.45907   6.848  0.00101 **
## Concentration  0.13381    0.02227   6.008  0.00184 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.73 on 5 degrees of freedom
## Multiple R-squared:  0.8783, Adjusted R-squared:  0.854 
## F-statistic:  36.1 on 1 and 5 DF,  p-value: 0.001835
## 
## [1] "RMSE for sheet Lys : 17.5180036336255"
## [1] "Regression results for sheet: Met"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -8.429 16.117  3.011  2.802 -4.325 -4.622 -4.553 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.480e+02  4.125e+00   35.89 3.16e-07 ***
## Concentration 1.118e-01  9.713e-03   11.51 8.67e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.039 on 5 degrees of freedom
## Multiple R-squared:  0.9637, Adjusted R-squared:  0.9564 
## F-statistic: 132.6 on 1 and 5 DF,  p-value: 8.666e-05
## 
## [1] "RMSE for sheet Met : 7.63936633688684"
## [1] "Regression results for sheet: Phe"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -19.172  34.100  20.020   6.385 -11.408 -13.894 -16.032 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   38.37437   10.31016   3.722 0.013684 *  
## Concentration  0.17714    0.02428   7.297 0.000757 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.59 on 5 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.897 
## F-statistic: 53.24 on 1 and 5 DF,  p-value: 0.0007569
## 
## [1] "RMSE for sheet Phe : 19.0942111872395"
## [1] "Regression results for sheet: Pro"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -10.743  17.058  18.999   9.683  -9.394 -12.471 -13.132 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   30.55712    7.29396   4.189  0.00858 **
## Concentration  0.07668    0.01717   4.465  0.00661 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.98 on 5 degrees of freedom
## Multiple R-squared:  0.7995, Adjusted R-squared:  0.7594 
## F-statistic: 19.93 on 1 and 5 DF,  p-value: 0.006611
## 
## [1] "RMSE for sheet Pro : 13.5082733134469"
## [1] "Regression results for sheet: Ser"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##      1      2      3      4      5      6      7 
## -6.343 10.750  8.333  4.370 -4.780 -5.893 -6.437 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   47.359338   3.781670  12.523 5.76e-05 ***
## Concentration  0.045617   0.008904   5.123   0.0037 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.287 on 5 degrees of freedom
## Multiple R-squared:   0.84,  Adjusted R-squared:  0.808 
## F-statistic: 26.25 on 1 and 5 DF,  p-value: 0.003698
## 
## [1] "RMSE for sheet Ser : 7.00357552494693"
## [1] "Regression results for sheet: Thr"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -11.708  19.097  20.341   5.779  -9.578 -10.921 -13.009 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   31.80307    7.44048   4.274 0.007906 ** 
## Concentration  0.12832    0.01752   7.325 0.000744 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.3 on 5 degrees of freedom
## Multiple R-squared:  0.9147, Adjusted R-squared:  0.8977 
## F-statistic: 53.65 on 1 and 5 DF,  p-value: 0.0007436
## 
## [1] "RMSE for sheet Thr : 13.7796237009493"
## [1] "Regression results for sheet: Trp"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##        1        2        3        4        5        6        7 
## -10.7926  21.6908  -0.3616   0.7963  -3.2713  -3.9285  -4.1331 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   109.77974    5.12642   21.41 4.12e-06 ***
## Concentration   0.08136    0.01207    6.74  0.00109 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.23 on 5 degrees of freedom
## Multiple R-squared:  0.9009, Adjusted R-squared:  0.881 
## F-statistic: 45.43 on 1 and 5 DF,  p-value: 0.00109
## 
## [1] "RMSE for sheet Trp : 9.4940183703731"
## [1] "Regression results for sheet: Tyr"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -13.963  24.903  15.054   2.973  -8.422  -9.533 -11.012 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   41.45233    7.45317   5.562 0.002585 ** 
## Concentration  0.17978    0.01755  10.244 0.000152 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.33 on 5 degrees of freedom
## Multiple R-squared:  0.9545, Adjusted R-squared:  0.9454 
## F-statistic: 104.9 on 1 and 5 DF,  p-value: 0.0001522
## 
## [1] "RMSE for sheet Tyr : 13.8031186392565"
## [1] "Regression results for sheet: Val"
## 
## Call:
## lm(formula = Mean ~ Concentration, data = regression_data)
## 
## Residuals:
##       1       2       3       4       5       6       7 
##  -8.949  11.573  24.205  19.073 -11.117 -16.295 -18.490 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   31.34478    8.88338   3.528   0.0168 *
## Concentration  0.08280    0.02092   3.959   0.0108 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.47 on 5 degrees of freedom
## Multiple R-squared:  0.7581, Adjusted R-squared:  0.7098 
## F-statistic: 15.67 on 1 and 5 DF,  p-value: 0.01076
## 
## [1] "RMSE for sheet Val : 16.4518448713978"

1.4 Figure1

rm(list=ls())
# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(extrafont)
library(reshape2)
library(car) 
library(cowplot)
library(gridExtra)

# Load fonts for Windows device
loadfonts(device = "win")

#_________________
# Read data
#_______________________
sheet_names <- excel_sheets("Data.xlsx")

read_and_assign_sheets <- function(file_path) {
  sheet_names <- excel_sheets(file_path)
  for (sheet in sheet_names) {
    data <- read.xlsx(file_path, sheet = sheet, colNames = TRUE)
    data$Con[is.na(data$Con)] <- paste0("rep", seq_len(sum(is.na(data$Con))))
    assign(sheet, data, envir = .GlobalEnv)
  }
}

file_path <- "Data.xlsx"
read_and_assign_sheets(file_path)

#__________________________________________
plot_regression_separate <- function(data, sheet_name) {
  
  average_row <- data %>% filter(Con == "Average")
  concentration_values <- as.numeric(names(data)[-1])  
  y_values <- as.numeric(average_row[-1])
  regression_data <- data.frame(Concentration = concentration_values, Mean = y_values)
  
  # Split data for first 4 points
  first_part <- regression_data[4:7, ]
  
  # Perform linear regression for first 4 points (red line)
  lm_model_first <- lm(Mean ~ Concentration, data = first_part)
  
  
  # Compute predictions for each model
  predictions_first <- predict(lm_model_first)
  
  # Compute RMSE for each model
  residuals_first <- first_part$Mean - predictions_first
  rmse_first <- sqrt(mean(residuals_first^2))
  
  # Extract R-squared and coefficients for each model
  r_squared_first <- summary(lm_model_first)$r.squared
  intercept_first <- coef(lm_model_first)[1]
  slope_first <- coef(lm_model_first)[2]
  
  equation_label_first <- paste0("Y = ", round(slope_first, 2), "X + ", round(intercept_first, 2))
  r_squared_label_first <- paste0("R² = ", round(r_squared_first, 3))
  rmse_label_first <- paste0("RMSE = ", round(rmse_first, 3))
  
  
  r2_calcite_split <- data.frame(equation_label_first, r_squared_label_first,rmse_label_first) 
  
  table_data <- data.frame(
    Values = c(
      paste(r2_calcite_split$equation_label_first),
      paste(r2_calcite_split$r_squared_label_first),
      paste(r2_calcite_split$rmse_label_first)
    ) 
  )
  
  table1 <- tableGrob(
    table_data,
    rows = NULL,
    cols =  NULL,
    theme = ttheme_minimal(
      core = list(
        fg_params = list(fontface = "bold", fontsize = 20, col = "black"),
        bg_params = list(fill = "white", col = "black", lwd = 1)
      ),
      colhead = list(fg_params = list(col="black", fontface="bold", fontsize = 20))
    )
  )
  
  
  
  repetition_point <- data %>%
    filter(!Con %in% c("Average", "STDEV")) %>%
    gather(key = "Concentration", value = "Value", -Con) %>%
    mutate(Concentration = as.numeric(Concentration),
           Value = as.numeric(Value))  # Convert both columns to numeric
  
  first_points <- repetition_point %>%
    filter(Concentration >= 1 & Concentration <= 50)
  
  
  # First 4 points plot
  p1 <- ggplot(first_part, aes(x = Concentration, y = Mean)) +
    geom_point(aes(x = Concentration, y = Mean, color = "Average Points"), size = 10) +  # Plot the average points
    geom_point(data = first_points, aes(x = Concentration, y = Value, color = "Repetition Points"), alpha = 0.6,size=5) +  # Plot the repetitions
    geom_smooth(method = "lm", se = FALSE, color = "navyblue", size = 3.5) +
    labs(title = paste(sheet_name, "- First 4 Points")) +
    scale_color_manual(values = c("Average Points" = "navyblue", "Repetition Points" = "#B8860B")) +  # Define colors in the legend
    scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 30))+  # Flexible scaling
    theme_minimal() +
    theme(
      plot.title = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.text = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.title.x = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.title.y = element_text(size = 30, family = "Times New Roman",face = "bold", margin = margin(r = 60)),
      legend.text = element_text(size = 25, family = "Times New Roman", face = "bold"),
      legend.title =element_blank(),
      legend.position = c(0.1, 0.85), 
      legend.direction = "vertical",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA)
      #aspect.ratio = 1.3/1
    )+
    scale_x_continuous(breaks = seq(0, 50, 10), limits = c(0, 50)) 
  
  
  
  # Conditional table position
  if (sheet_name %in% c("Met", "Trp", "Aspg", "Arg", "Lys","His")) {
    xmin_val <- 30
    xmax_val <- 40
    ymin_val <- 20
    ymax_val <- 40
  } else {
    xmin_val <- 10
    xmax_val <- 20
    ymin_val <- 80
    ymax_val <- 90
  }
  
  # Add table annotation
  p1 <- p1 +
    annotation_custom(
      grob = table1,
      xmin = xmin_val,
      xmax = xmax_val,
      ymin = ymin_val,
      ymax = ymax_val
    )
  
  # Conditional legend position for "Met"
  if (sheet_name == "Met") {
    p1 <- p1 + theme(legend.position = c(0.1, 0.7))
  }
  
  # Perform linear regression: Mean (Y) ~ Concentration (X)
  lm_model <- lm(Mean ~ Concentration, data = regression_data)
  
  # Compute predictions based on the linear model
  lm_predictions <- predict(lm_model)
  
  # Compute RMSE for the linear model
  lm_residuals <- regression_data$Mean - lm_predictions
  lm_rmse <- sqrt(mean(lm_residuals^2))
  
  # Extract R-squared and coefficients for the linear model
  lm_r_squared <- summary(lm_model)$r.squared
  lm_intercept <- coef(lm_model)[1]
  lm_slope <- coef(lm_model)[2]
  
  # Perform loess regression
  loess_model <- loess(Mean ~ Concentration, data = regression_data)
  
  # Compute predictions based on the loess model
  loess_predictions <- predict(loess_model)
  
  # Compute RMSE for the loess model
  loess_residuals <- regression_data$Mean - loess_predictions
  loess_rmse <- sqrt(mean(loess_residuals^2))
  
  # Calculate a pseudo R-squared for loess (using 1 - (SSE/SST))
  sst <- sum((regression_data$Mean - mean(regression_data$Mean))^2) # Total Sum of Squares
  sse_loess <- sum(loess_residuals^2) # Residual Sum of Squares for Loess
  loess_r_squared <- 1 - (sse_loess / sst)
  
  # Prepare the labels
  lm_r_squared_label <- paste0("R² = ", round(lm_r_squared, 3))
  lm_rmse_label <- paste0("RMSE = ", round(lm_rmse, 3))
  
  loess_r_squared_label <- paste0("R² = ", round(loess_r_squared, 3)) # Loess R-squared label
  loess_rmse_label <- paste0("RMSE = ", round(loess_rmse, 3))
  
  # Create the data frame for the table (WITHOUT the Equation row)
  table_data2 <- data.frame(
    LinearModel = c(lm_r_squared_label, lm_rmse_label),
    LoessModel = c(loess_r_squared_label, loess_rmse_label)
  )
  
  
  table2 <- tableGrob(
    table_data2,
    rows = NULL,
    theme = ttheme_minimal(
      core = list(
        fg_params = list(fontface = "bold", fontsize = 20, col = "black"),
        bg_params = list(fill = "white", col = "black", lwd = 1)
      ),
      colhead = list(fg_params = list(col="black", fontface="bold", fontsize = 20))
    )
  )
  
  # Extract all non-average rows (i.e., repetitions) to plot as points, ensuring numeric values
  repetition_data <- data %>%
    filter(!Con %in% c("Average", "STDEV")) %>%
    gather(key = "Concentration", value = "Value", -Con) %>%
    mutate(Concentration = as.numeric(Concentration),
           Value = as.numeric(Value))  # Convert both columns to numeric
  
  
  
  # All points plot
  p2 <- ggplot(regression_data, aes(x = Concentration, y = Mean)) +
    geom_point(data = regression_data, aes(x = Concentration, y = Mean, color = "Average Points"), size = 10) +  # Plot the average points
    geom_point(data = repetition_data, aes(x = Concentration, y = Value, color = "Repetition Points"), alpha = 0.6,size=5) +  # Plot the repetitions
    geom_smooth(method = "lm", se = FALSE, color = "navyblue", size = 2) +
    geom_smooth(method = "loess", se = FALSE, color = "navyblue", linetype = "dashed", size = 3.5) +
    labs(title = paste(sheet_name, "- All Points")) +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 1000, 200), limits = c(0, 1000)) +
    scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+  # Flexible scaling
    scale_color_manual(values = c("Average Points" = "navyblue", "Repetition Points" = "#B8860B")) +  # Define colors in the legend
    theme(
      plot.title = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.text = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.title.x = element_text(size = 30, family = "Times New Roman", face = "bold"),
      axis.title.y = element_text(size = 30, family = "Times New Roman",face = "bold", margin = margin(r = 60)),
      legend.text = element_text(size = 25, family = "Times New Roman", face = "bold"),
      legend.title =element_blank(),
      legend.position = c(0.08, 0.85), 
      legend.direction = "vertical",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA)
    )
  
  p2 <- p2 +
    annotation_custom(
      grob = table2,
      xmin =750,  # Position table near the right edge
      xmax =850,  # Slightly left of the rightmost edge
      ymin = 35,  # Slightly below the top
      ymax = 45                            # Align to the top edge
    )
  
  # Combine the two plots in a grid
  combined_plot <- plot_grid(p1, p2, ncol = 2, rel_widths = c(0.5, 1), align = "v", axis = "lr") +
    theme(plot.margin = margin(10, 10, 10, 10),
          plot.background = element_rect(color = "black", size = 2))  # Add a border around the entire combined plot
  
  return(combined_plot)
  
}

# List of plots
plots <- list()


# Create and store plots for each sheet
for (sheet in sheet_names) {
  data <- get(sheet)  # Fetch the data frame (e.g., AIa, Asp, etc.)
  
  # Generate the individual plot and store it
  p <- plot_regression_separate(data, sheet)
  plots[[sheet]] <- p
}

# Arrange all the plots in a 3x7 grid layout using cowplot
combined_plot <- plot_grid(plotlist = plots, ncol = 2, nrow = 11)
ggsave("combined_plot.png", plot = combined_plot, width = 40, height = 50 , units = "in",limitsize = FALSE)

knitr::include_graphics("combined_plot.png")

1.5 Figure 2

#___________________
#barplot
#____________________
combine_data_for_individual_barplots <- function(sheet_names) {
  combined_data <- data.frame()
  
  for (sheet in sheet_names) {
    data <- get(sheet)  # Fetch the data frame (e.g., AIa, Asp, etc.)
    
    # Extract both 'Average' and 'STDEV' rows
    mean_data <- data %>%
      filter(Con == "Average") %>%
      gather(key = "Concentration", value = "Mean", -Con) %>%
      mutate(Concentration = as.numeric(Concentration),  # Convert Concentration to numeric
             Mean = as.numeric(Mean))  # Ensure Mean is numeric
    
    stdev_data <- data %>%
      filter(Con == "STDEV") %>%
      gather(key = "Concentration", value = "STDEV", -Con) %>%
      mutate(Concentration = as.numeric(Concentration),  # Convert Concentration to numeric
             STDEV = as.numeric(STDEV))  # Ensure STDEV is numeric
    
    # Combine Mean and STDEV into a single data frame
    combined_mean_stdev <- mean_data %>%
      select(-Con) %>%
      left_join(stdev_data %>% select(-Con), by = "Concentration") %>%
      mutate(Variable = sheet)  # Add variable name
    
    # Combine with previous data
    combined_data <- rbind(combined_data, combined_mean_stdev)
  }
  
  return(combined_data)
}

# Function to create a barplot for each variable with error bars
create_individual_barplot <- function(data, variable_name) {
  p <- ggplot(data, aes(x = factor(Concentration), y = Mean)) +
    geom_bar(stat = "identity", fill = "#B8860B", width = 0.7) +  # Barplot with means
    geom_errorbar(aes(ymin = Mean - STDEV, ymax = Mean + STDEV), width = 0.4, color = "navyblue") +  # Error bars using STDEV
    labs(title = variable_name, 
         x = expression(bold(paste("[Asp] (", mu, "M)"))),  # Custom x-axis label
         y = expression(bold(Delta~"admittance ("*mu*"A/V)")))+
    theme_minimal() +
    scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+  # with some padding
    theme(
      plot.title = element_text(size = 20, family = "Times New Roman", face = "bold"),
      axis.text = element_text(size = 20, family = "Times New Roman", face = "bold"),
      axis.title.x = element_text(size = 20, family = "Times New Roman", face = "bold"),
      axis.title.y = element_text(size = 20, family = "Times New Roman",face = "bold", margin = margin(r = 30)),
      legend.text = element_text(size = 15, family = "Times New Roman", face = "bold"),
      legend.title =element_blank(),
      legend.position = NULL, 
      legend.direction = "vertical",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA)
    )
  return(p)
}

# Function to create all barplots for each variable and combine them
create_combined_barplots <- function(sheet_names) {
  combined_data <- combine_data_for_individual_barplots(sheet_names)
  
  plots <- list()
  
  # Loop over each unique variable and create a barplot for each
  for (variable in unique(combined_data$Variable)) {
    variable_data <- combined_data %>% filter(Variable == variable)
    p <- create_individual_barplot(variable_data, variable)
    plots[[variable]] <- p
  }
  
  # Combine all plots into one grid layout
  combined_plot <- cowplot::plot_grid(plotlist = plots, ncol = 4)  # Adjust ncol to control number of columns
  return(combined_plot)
}
combined_barplots <- create_combined_barplots(sheet_names)
ggsave("combined_individual_barplots.png", plot = combined_barplots, width = 20, height = 17)
knitr::include_graphics("combined_individual_barplots.png")

1.6 Figure3

###_____________________
#boxplot
#________________________
combine_all_data <- function(sheet_names) {
  combined_data <- data.frame()
  
  for (sheet in sheet_names) {
    data <- get(sheet)  # Fetch the data frame (e.g., AIa, Asp, etc.)
    
    # Filter for the row where "Con" is "Average"
    clean_data <- data %>%
      filter(Con == "Average") %>%
      gather(key = "Concentration", value = "Value", -Con) %>%
      mutate(Concentration = as.numeric(Concentration))  # Convert Concentration to numeric
    
    # Add a column to label the variable (sheet name)
    clean_data$Variable <- sheet
    
    # Combine with previous data
    combined_data <- rbind(combined_data, clean_data)
  }
  
  return(combined_data)
}

# Function to create a single boxplot with 21 boxes and 5 jittered repetition points for each
create_combined_boxplot <- function(combined_data) {
  # Ensure the 'Value' column is numeric
  combined_data$Value <- as.numeric(combined_data$Value)
  
  
  # Get the range of the y-values to determine appropriate limits
  y_limits <- range(combined_data$Value, na.rm = TRUE)
  
  # Create the boxplot with jittered points for all variables
  
  p <- ggplot(combined_data, aes(x = Variable, y = Value)) +
    geom_boxplot(outlier.shape = NA,  fill = NA) +  # Boxplot without showing outliers
    geom_jitter(aes(color = Concentration), width = 0.2, size = 4) + 
    scale_color_gradient(low = "#B8860B", high = "navyblue", name = "Concentration") +  # Gradient color scale
    labs(
         x = "Variable",
         y = expression(bold(Delta~"admittance ("*mu*"A/V)")))+
    theme_minimal() +
     scale_y_continuous(limits = c(0, 260), breaks = seq(0, 260, 50))+  # with some padding
    theme(
      plot.title = element_text(size = 20, family = "Times New Roman", face = "bold"),
      axis.text = element_text(size = 20, family = "Times New Roman",face = "bold"),
      axis.title.y = element_text(size = 20, family = "Times New Roman",face = "bold"),
      axis.title.x = element_text(size = 20, family = "Times New Roman", face="bold"),
      legend.text = element_text(size = 20, family = "Times New Roman", face = "bold"),
      legend.title =element_blank(),
      legend.position = c(0.1, 0.8), 
      legend.direction = "vertical",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA)
    )
  
  return(p)
}

# Combine data from all sheets into one data frame
combined_data <- combine_all_data(sheet_names)

# Create and display the adjusted combined boxplot with 5 repetition points per box
combined_boxplot <- create_combined_boxplot(combined_data)
ggsave("combined_boxplot.png", plot = combined_boxplot, width = 16, height = 8)
knitr::include_graphics("combined_boxplot.png")